{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "714dd23e-61bb-4e25-b1f6-71a73f188ac9",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import seaborn as sns\n",
    "import matplotlib.pyplot as plt\n",
    "import numpy as np\n",
    "import pysal as ps\n",
    "import geopandas as gpd\n",
    "from sklearn import cluster\n",
    "from sklearn.cluster import KMeans\n",
    "from sklearn.preprocessing import scale\n",
    "import folium\n",
    "import json\n",
    "import numpy as np\n",
    "%matplotlib inline\n",
    "sns.set(style=\"whitegrid\")\n",
    "from sklearn.decomposition import PCA"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e7339575-352c-406c-b140-0ac09e34febb",
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_excel(r\"\\df_1.xlsx\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1ff5205d-04bc-4593-abf9-1ecaa8766ed3",
   "metadata": {},
   "outputs": [],
   "source": [
    "df['education_index'] = np.log1p(df['education_index'])\n",
    "df['life_expectancy'] = np.log1p(df['life_expectancy'])\n",
    "df['ILO'] = np.log1p(df['ILO'])\n",
    "df['labor_productivity'] = np.log1p(df['labor_productivity'])\n",
    "df['labor_protection'] = np.log1p(df['labor_protection'])\n",
    "df_1 = df[['country', 'education_expenditure','education_index','life_expectancy', 'immunization', 'gender','labor_productivity']]\n",
    "df_1 = df_1.set_index('country')\n",
    "df_2 = df[['country','social_spending', 'labor_protection', 'ILO']]\n",
    "df_2 = df_2.set_index('country')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "6bc7e4da-2665-47c5-aa42-14c7843f541d",
   "metadata": {},
   "source": [
    "# 1. PCA"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "78d99296-85b0-4151-8362-3bad02261f2a",
   "metadata": {},
   "outputs": [],
   "source": [
    "pca_1 = PCA(n_components = 2)\n",
    "printcipalComponents = pca_1.fit_transform(df_1)\n",
    "\n",
    "pca_1 = pca_1.explained_variance_ratio_\n",
    "pca_1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2322d5c9-a895-41cf-aaa1-f1e3fad5ae70",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_v = pd.DataFrame(pca_1, index=['PC1','PC2'], columns=[' '])\n",
    "\n",
    "my_colors = ['red','grey']\n",
    "my_explode = (0.1, 0,)\n",
    "wedgeprops={'width': 0.7, 'edgecolor': 'w', 'linewidth': 5}\n",
    "\n",
    "\n",
    "df_v.plot.pie(y = ' ',autopct = '%2.1f%%', startangle=15, colors=my_colors, explode=my_explode,\n",
    "              textprops={'fontsize': 18}, wedgeprops=wedgeprops)\n",
    "#plt.title('PCA 1 components')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "74bd09bf-d501-421a-8918-a04723e458e1",
   "metadata": {},
   "outputs": [],
   "source": [
    "pca_2 = PCA(n_components = 2)\n",
    "printcipalComponents = pca_2.fit_transform(df_2)\n",
    "\n",
    "pca_2 = pca_2.explained_variance_ratio_\n",
    "pca_2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "bf4752fa-b1c5-4802-9f04-e2683ecab4dc",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_d = pd.DataFrame(pca_2, index=['PC1','PC2'], columns=[' '])\n",
    "\n",
    "my_colors = ['red','grey']\n",
    "my_explode = (0.1, 0)\n",
    "wedgeprops={'width': 0.7, 'edgecolor': 'w', 'linewidth': 5}\n",
    "\n",
    "\n",
    "df_d.plot.pie(y = ' ',autopct = '%1.1f%%', startangle=15, colors=my_colors, explode=my_explode,\n",
    "              textprops={'fontsize': 18}, wedgeprops=wedgeprops)\n",
    "#plt.title('PCA 1 components')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ba9198a9-a723-4814-84e5-eb93e83e25bf",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "id": "b740a2e6-4878-4909-b673-178f3bb54bf3",
   "metadata": {},
   "source": [
    "# 2. Clustering"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3b7582c8-ff1f-4676-8ac2-1ac85265b612",
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_excel(r\"\\df_1.xlsx\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "476d5c24-eae9-45c0-94d2-392a39f90a46",
   "metadata": {},
   "outputs": [],
   "source": [
    "df['education_index'] = np.log1p(df['education_index'])\n",
    "df['life_expectancy'] = np.log1p(df['life_expectancy'])\n",
    "df['ILO'] = np.log1p(df['ILO'])\n",
    "df['labor_productivity'] = np.log1p(df['labor_productivity'])\n",
    "df['labor_protection'] = np.log1p(df['labor_protection'])\n",
    "df_1 = df[['country', 'education_expenditure','education_index','life_expectancy', 'immunization', 'gender','labor_productivity']]\n",
    "df_1 = df_1.set_index('country')\n",
    "df_2 = df[['country','social_spending', 'labor_protection', 'ILO']]\n",
    "df_2 = df_2.set_index('country')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8afae6c2-c6b9-444f-bc1d-5dc0d95247a5",
   "metadata": {},
   "outputs": [],
   "source": [
    "pca = PCA(n_components=1)\n",
    "df_pca = pca.transform(df_1)\n",
    "pca_columns = ['productive']\n",
    "df_productive = pd.DataFrame(df_pca, columns = pca_columns)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "beb4788a-85a7-403b-8e2e-f59b97267874",
   "metadata": {},
   "outputs": [],
   "source": [
    "pca.fit(df_2)\n",
    "df_pca_2 = pca.transform(df_2)\n",
    "pca_columns = ['protective']\n",
    "df_protective = pd.DataFrame(df_pca_2, columns = pca_columns)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b61a4136-494c-4406-8829-c3e050284b7f",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_merge = pd.merge(df_productive, df_protective, left_index=True, right_index=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c3795947-15a0-4808-881c-c2f1b30d53d2",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_merge = pd.merge(df_merge,df['country'] , left_index=True, right_index=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9a50b1f1-3c92-4511-801b-f7283c13a3ab",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_merge = df_merge.set_index('country')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2a5ce776-7759-491f-b97b-264572b0a358",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a1aa710d-2c8a-4047-9479-68487e1ea59e",
   "metadata": {},
   "outputs": [],
   "source": [
    "### 여러개의 클러스터링 갯수를 List로 입력 받아 각각의 실루엣 계수를 면적으로 시각화한 함수 작성\n",
    "def visualize_silhouette(cluster_lists, X_features): \n",
    "    \n",
    "    from sklearn.datasets import make_blobs\n",
    "    from sklearn.cluster import KMeans\n",
    "    from sklearn.metrics import silhouette_samples, silhouette_score\n",
    "\n",
    "    import matplotlib.pyplot as plt\n",
    "    import matplotlib.cm as cm\n",
    "    import math\n",
    "    \n",
    "    # 입력값으로 클러스터링 갯수들을 리스트로 받아서, 각 갯수별로 클러스터링을 적용하고 실루엣 개수를 구함\n",
    "    n_cols = len(cluster_lists)\n",
    "    \n",
    "    # plt.subplots()으로 리스트에 기재된 클러스터링 수만큼의 sub figures를 가지는 axs 생성 \n",
    "    fig, axs = plt.subplots(figsize=(4*n_cols, 4), nrows=1, ncols=n_cols)\n",
    "    \n",
    "    # 리스트에 기재된 클러스터링 갯수들을 차례로 iteration 수행하면서 실루엣 개수 시각화\n",
    "    for ind, n_cluster in enumerate(cluster_lists):\n",
    "        \n",
    "        # KMeans 클러스터링 수행하고, 실루엣 스코어와 개별 데이터의 실루엣 값 계산. \n",
    "        clusterer = KMeans(n_clusters = n_cluster, max_iter=500, random_state=0)\n",
    "        cluster_labels = clusterer.fit_predict(X_features)\n",
    "        \n",
    "        sil_avg = silhouette_score(X_features, cluster_labels)\n",
    "        sil_values = silhouette_samples(X_features, cluster_labels)\n",
    "        \n",
    "        y_lower = 10\n",
    "        axs[ind].set_title('Number of Cluster : '+ str(n_cluster)+'\\n' \\\n",
    "                          'Silhouette Score :' + str(round(sil_avg,3)), fontsize=16 )\n",
    "        axs[ind].set_xlabel(\"The silhouette coefficient values\", fontsize=13)\n",
    "        axs[ind].set_ylabel(\"Cluster label\", fontsize=13)\n",
    "        axs[ind].set_xlim([-0.1, 1])\n",
    "        axs[ind].set_ylim([0, len(X_features) + (n_cluster + 1) * 10])\n",
    "        axs[ind].set_yticks([])  # Clear the yaxis labels / ticks\n",
    "        axs[ind].set_xticks([0, 0.2, 0.4, 0.6, 0.8, 1])\n",
    "        \n",
    "        # 클러스터링 갯수별로 fill_betweenx( )형태의 막대 그래프 표현. \n",
    "        for i in range(n_cluster):\n",
    "            ith_cluster_sil_values = sil_values[cluster_labels==i]\n",
    "            ith_cluster_sil_values.sort()\n",
    "            \n",
    "            size_cluster_i = ith_cluster_sil_values.shape[0]\n",
    "            y_upper = y_lower + size_cluster_i\n",
    "            \n",
    "            color = cm.nipy_spectral(float(i) / n_cluster)\n",
    "            axs[ind].fill_betweenx(np.arange(y_lower, y_upper), 0, ith_cluster_sil_values, \\\n",
    "                                facecolor=color, edgecolor=color, alpha=0.7)\n",
    "            axs[ind].text(-0.05, y_lower + 0.5 * size_cluster_i, str(i))\n",
    "            y_lower = y_upper + 10\n",
    "            \n",
    "        axs[ind].axvline(x=sil_avg, color=\"red\", linestyle=\"--\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "36beba12-a5e9-453a-9ea1-d0a6ae385af4",
   "metadata": {},
   "outputs": [],
   "source": [
    "### 여러개의 클러스터링 갯수를 List로 입력 받아 각각의 클러스터링 결과를 시각화 \n",
    "def visualize_kmeans_plot_multi(cluster_lists, X_features):\n",
    "    \n",
    "    from sklearn.cluster import KMeans\n",
    "    import pandas as pd\n",
    "    import numpy as np\n",
    "    \n",
    "    # plt.subplots()으로 리스트에 기재된 클러스터링 만큼의 sub figures를 가지는 axs 생성 \n",
    "    n_cols = len(cluster_lists)\n",
    "    fig, axs = plt.subplots(figsize=(4*n_cols, 5), nrows=1, ncols=n_cols)\n",
    "    \n",
    "    \n",
    "     # 리스트에 기재된 클러스터링 갯수들을 차례로 iteration 수행하면서 KMeans 클러스터링 수행하고 시각화\n",
    "    for ind, n_cluster in enumerate(cluster_lists):\n",
    "        \n",
    "        # KMeans 클러스터링으로 클러스터링 결과를 dataframe에 저장. \n",
    "        clusterer = KMeans(n_clusters = n_cluster, max_iter=500, random_state=0)\n",
    "        cluster_labels = clusterer.fit_predict(df_merge)\n",
    "        df_merge['kmeans_label']=cluster_labels\n",
    "        \n",
    "        unique_labels = np.unique(clusterer.labels_)\n",
    "        markers=['o', 's', '^', 'x', '*', '>', '<', '+']\n",
    "\n",
    "        \n",
    "        # 클러스터링 결과값 별로 scatter plot 으로 시각화\n",
    "        for label in unique_labels:\n",
    "            label_df = df_merge[df_merge['kmeans_label']==label]\n",
    "            if label == -1:\n",
    "                cluster_legend = 'Noise'\n",
    "            else :\n",
    "                cluster_legend = 'Cluster '+str(label)           \n",
    "            axs[ind].scatter(x=label_df['productive'], y=label_df['protective'], s=70,\\\n",
    "                        edgecolor='k', marker=markers[label], label=cluster_legend)\n",
    "        \n",
    "\n",
    "        axs[ind].set_title('Number of Cluster : '+ str(n_cluster), fontsize=16)    \n",
    "        axs[ind].legend(loc='upper right')\n",
    "\n",
    "    plt.show()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7ed10cd1-2da1-4674-b1b8-f5ef2444ce08",
   "metadata": {},
   "outputs": [],
   "source": [
    "visualize_silhouette([2,3,4,5,6],df_merge)\n",
    "visualize_kmeans_plot_multi([2,3,4,5,6],df_merge)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6e20de26-71a0-4fc8-a8a4-2143fdd8168f",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_merge = df_merge.set_index('country')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "bb2e7272-c1f5-4e72-bd35-38233a47621a",
   "metadata": {},
   "source": [
    "# 3. Optimal Clustering"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0f5bdca1-5099-4d40-afd8-eb5c388a3822",
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_excel(r\"\\df_1.xlsx\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9c8366b5-a5f4-4c8d-b714-c3b0edc34add",
   "metadata": {},
   "outputs": [],
   "source": [
    "df['education_index'] = np.log1p(df['education_index'])\n",
    "df['life_expectancy'] = np.log1p(df['life_expectancy'])\n",
    "df['ILO'] = np.log1p(df['ILO'])\n",
    "df['labor_productivity'] = np.log1p(df['labor_productivity'])\n",
    "df['labor_protection'] = np.log1p(df['labor_protection'])\n",
    "df_1 = df[['country', 'education_expenditure','education_index','life_expectancy', 'immunization', 'gender','labor_productivity']]\n",
    "df_1 = df_1.set_index('country')\n",
    "df_2 = df[['country','social_spending', 'labor_protection', 'ILO']]\n",
    "df_2 = df_2.set_index('country')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0f75cb31-30e8-4525-b588-2fb5531a40ac",
   "metadata": {},
   "outputs": [],
   "source": [
    "pca = PCA(n_components=1)\n",
    "df_pca = pca.transform(df_1)\n",
    "pca_columns = ['productive']\n",
    "df_productive = pd.DataFrame(df_pca, columns = pca_columns)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "26ff878a-17e3-4dd7-98eb-25b7d93d339a",
   "metadata": {},
   "outputs": [],
   "source": [
    "pca.fit(df_2)\n",
    "df_pca_2 = pca.transform(df_2)\n",
    "pca_columns = ['protective']\n",
    "df_protective = pd.DataFrame(df_pca_2, columns = pca_columns)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "fb1ebc9e-3c73-4011-aeef-9328b78c0ce4",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_merge = pd.merge(df_productive, df_protective, left_index=True, right_index=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1672a969-8fc0-4d7f-83eb-d58b41a2ecba",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_merge = pd.merge(df_merge,df['country'] , left_index=True, right_index=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "99133a24-c682-4cb1-b05f-af0b9910688d",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "658a43f5-bc9f-45c3-9357-4bd09dc96222",
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.metrics import silhouette_score, silhouette_samples\n",
    "import matplotlib.cm as cm\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "from sklearn.cluster import KMeans\n",
    "\n",
    "# Assuming df_merge is already defined\n",
    "\n",
    "features = df_merge[['productive', 'protective']]\n",
    "\n",
    "# Fit K-means with both features\n",
    "kmeans_both = KMeans(n_clusters=2, max_iter=500, random_state=0)\n",
    "labels_both = kmeans_both.fit_predict(features)\n",
    "silhouette_both = silhouette_score(features, labels_both)\n",
    "\n",
    "# Fit K-means with 'productive' only\n",
    "kmeans_prod = KMeans(n_clusters=2, max_iter=500, random_state=0)\n",
    "labels_prod = kmeans_prod.fit_predict(df_merge[['productive']])\n",
    "silhouette_prod = silhouette_score(df_merge[['productive']], labels_prod)\n",
    "\n",
    "# Fit K-means with 'protective' only\n",
    "kmeans_prot = KMeans(n_clusters=2, max_iter=500, random_state=0)\n",
    "labels_prot = kmeans_prot.fit_predict(df_merge[['protective']])\n",
    "silhouette_prot = silhouette_score(df_merge[['protective']], labels_prot)\n",
    "\n",
    "# Combined plotting function\n",
    "def plot_clusters_combined():\n",
    "    fig, axes = plt.subplots(2, 3, figsize=(20, 14))\n",
    "    fig.subplots_adjust(hspace=0.3, wspace=0.17)\n",
    "    markers = ['o', 's', '^', 'x', '*', '>', '<', '+']\n",
    "    \n",
    "    # Plot clusters for both features\n",
    "    unique_labels = np.unique(labels_both)\n",
    "    for label in unique_labels:\n",
    "        label_data = features.values[labels_both == label]\n",
    "        axes[0, 0].scatter(label_data[:, 0], label_data[:, 1], \n",
    "                           s=200, edgecolor='k', marker=markers[label % len(markers)], label=f'Cluster {label}')\n",
    "    axes[0, 0].scatter(kmeans_both.cluster_centers_[:, 0], kmeans_both.cluster_centers_[:, 1], \n",
    "                       s=300, color='red', marker='X', label='Centers')\n",
    "    # Calculate and annotate the absolute difference\n",
    "    center_diff_both = np.abs(kmeans_both.cluster_centers_[0] - kmeans_both.cluster_centers_[1])\n",
    "    axes[0, 0].annotate(f'Diff: {center_diff_both[0]:.4f}, {center_diff_both[1]:.4f}', xy=(0.5, 0.5), xycoords='axes fraction', fontsize=22, ha='center', color='red')\n",
    "    axes[0, 0].set_title('(a) Clusters with Both Features\\nSilhouette Score: {:.3f}'.format(silhouette_both), fontsize=18)\n",
    "    axes[0, 0].set_xlabel('Commodification', size=18)\n",
    "    axes[0, 0].set_ylabel('Decommodification', size=18)\n",
    "    axes[0, 0].legend(fontsize=15)\n",
    "    axes[0, 0].set_xlim([-9, 9])\n",
    "    axes[0, 0].set_ylim([-9, 9])\n",
    "    axes[0, 0].tick_params(axis='both', which='major', labelsize=16)\n",
    "\n",
    "    # Plot clusters for 'productive' feature\n",
    "    unique_labels = np.unique(labels_prod)\n",
    "    for label in unique_labels:\n",
    "        label_data = df_merge[['productive', 'productive']].values[labels_prod == label]\n",
    "        axes[0, 1].scatter(label_data[:, 0], label_data[:, 1], \n",
    "                           s=200, edgecolor='k', marker=markers[label % len(markers)], label=f'Cluster {label}')\n",
    "    axes[0, 1].scatter(np.c_[kmeans_prod.cluster_centers_, np.zeros_like(kmeans_prod.cluster_centers_)][:, 0], \n",
    "                       np.c_[kmeans_prod.cluster_centers_, np.zeros_like(kmeans_prod.cluster_centers_)][:, 1], \n",
    "                       s=300, color='red', marker='X', label='Centers')\n",
    "    # Calculate and annotate the absolute difference\n",
    "    center_diff_prod = np.abs(kmeans_prod.cluster_centers_[0] - kmeans_prod.cluster_centers_[1])\n",
    "    axes[0, 1].annotate(f'Diff: {center_diff_prod[0]:.4f}', xy=(0.5, 0.5), xycoords='axes fraction', fontsize=22, ha='right', color='red')\n",
    "    axes[0, 1].set_title('(b) Clusters with Commodification Feature\\nSilhouette Score: {:.3f}'.format(silhouette_prod), fontsize=18)\n",
    "    axes[0, 1].set_xlabel('Commodification', size=18)\n",
    "    axes[0, 1].set_ylabel('Commodification', size=18)\n",
    "    axes[0, 1].legend(fontsize=15)\n",
    "    axes[0, 1].set_xlim([-9, 9])\n",
    "    axes[0, 1].set_ylim([-9, 9])\n",
    "    axes[0, 1].tick_params(axis='both', which='major', labelsize=16)\n",
    "\n",
    "    # Plot clusters for 'protective' feature\n",
    "    unique_labels = np.unique(labels_prot)\n",
    "    for label in unique_labels:\n",
    "        label_data = df_merge[['protective', 'protective']].values[labels_prot == label]\n",
    "        axes[0, 2].scatter(label_data[:, 0], label_data[:, 1], \n",
    "                           s=200, edgecolor='k', marker=markers[label % len(markers)], label=f'Cluster {label}')\n",
    "    axes[0, 2].scatter(np.c_[kmeans_prot.cluster_centers_, np.zeros_like(kmeans_prot.cluster_centers_)][:, 0], \n",
    "                       np.c_[kmeans_prot.cluster_centers_, np.zeros_like(kmeans_prot.cluster_centers_)][:, 1], \n",
    "                       s=300, color='red', marker='X', label='Centers')\n",
    "    # Calculate and annotate the absolute difference\n",
    "    center_diff_prot = np.abs(kmeans_prot.cluster_centers_[0] - kmeans_prot.cluster_centers_[1])\n",
    "    axes[0, 2].annotate(f'Diff: {center_diff_prot[0]:.4f}', xy=(0.5, 0.5), xycoords='axes fraction', fontsize=22, ha='right', color='red')\n",
    "    axes[0, 2].set_title('(c) Clusters with Decommodification Feature\\nSilhouette Score: {:.3f}'.format(silhouette_prot), fontsize=18)\n",
    "    axes[0, 2].set_xlabel('Decommodification', size=18)\n",
    "    axes[0, 2].set_ylabel('Decommodification', size=18)\n",
    "    axes[0, 2].legend(fontsize=15)\n",
    "    axes[0, 2].set_xlim([-9, 9])\n",
    "    axes[0, 2].set_ylim([-9, 9])\n",
    "    axes[0, 2].tick_params(axis='both', which='major', labelsize=16)\n",
    "\n",
    "    # Plot silhouette for both features\n",
    "    plot_silhouette_subplot(features, labels_both, axes[1, 0], 'Silhouette plot for both features\\nSilhouette Score: {:.3f}'.format(silhouette_both))\n",
    "\n",
    "    # Plot silhouette for 'productive' feature\n",
    "    plot_silhouette_subplot(df_merge[['productive']], labels_prod, axes[1, 1], 'Silhouette plot for Commodification feature\\nSilhouette Score: {:.3f}'.format(silhouette_prod))\n",
    "\n",
    "    # Plot silhouette for 'protective' feature\n",
    "    plot_silhouette_subplot(df_merge[['protective']], labels_prot, axes[1, 2], 'Silhouette plot for Decommodification feature\\nSilhouette Score: {:.3f}'.format(silhouette_prot))\n",
    "\n",
    "    plt.show()\n",
    "\n",
    "def plot_silhouette_subplot(data, labels, ax, title):\n",
    "    silhouette_avg = silhouette_score(data, labels)\n",
    "    sample_silhouette_values = silhouette_samples(data, labels)\n",
    "\n",
    "    y_lower = 10\n",
    "    n_clusters = len(np.unique(labels))\n",
    "\n",
    "    ax.set_xlim([-0.1, 1])\n",
    "    ax.set_ylim([0, len(data) + (n_clusters + 1) * 10])\n",
    "\n",
    "    for i in range(n_clusters):\n",
    "        ith_cluster_silhouette_values = sample_silhouette_values[labels == i]\n",
    "        ith_cluster_silhouette_values.sort()\n",
    "\n",
    "        size_cluster_i = ith_cluster_silhouette_values.shape[0]\n",
    "        y_upper = y_lower + size_cluster_i\n",
    "\n",
    "        color = cm.nipy_spectral(float(i) / n_clusters)\n",
    "        ax.fill_betweenx(np.arange(y_lower, y_upper),\n",
    "                         0, ith_cluster_silhouette_values,\n",
    "                         facecolor=color, edgecolor=color, alpha=0.7)\n",
    "\n",
    "        ax.text(-0.05, y_lower + 0.5 * size_cluster_i, str(i))\n",
    "\n",
    "        y_lower = y_upper + 10\n",
    "\n",
    "    ax.set_title(title, fontsize=18)\n",
    "    ax.set_xlabel(\"Silhouette coefficient values\", size=18)\n",
    "    ax.set_ylabel(\"Cluster label\", size=18)\n",
    "\n",
    "    ax.axvline(x=silhouette_avg, color=\"red\", linestyle=\"--\")\n",
    "    ax.set_yticks([])\n",
    "    ax.set_xticks(np.arange(-0.1, 1.1, 0.2))\n",
    "    ax.tick_params(axis='both', which='major', labelsize=16)\n",
    "\n",
    "# Plot all clusters and silhouette plots in a 2x3 grid\n",
    "plot_clusters_combined()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d13e543d-5a24-4217-a973-30684bd43865",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "115e12c3-814a-42c6-9b98-3a279a9b7787",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a78d83de-0178-4e71-9d36-23e6b816c17d",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0db2f473-5f05-427a-8982-e23ddba4a961",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
